# Run preamble
source("code/preamble.R")

# Load data
study1_data <- readRDS("data/study1_data.rds")
study2_data <- readRDS("data/study2_data.rds")
study3_data <- readRDS("data/study3_data.rds")
study4_data <- readRDS("data/study4_data.rds")
study5_data <- readRDS("data/study5_data.rds")

# Bind and select data from different studies
bound_data <- bind_rows(
  study1_data, study2_data, study3_data, study4_data, study5_data
) %>%
  select(
    age_factor, gender, race, latinx, edu_factor, study
  )

# Rename variables
bound_data <- bound_data %>%
  rename(
    Age = age_factor,
    Gender = gender,
    Race = race,
    Hispanic = latinx,
    Education = edu_factor
  )

# Generate sample demographics table
demo_table <- bound_data %>%
  select(-starts_with("start_")) %>%
  group_by(study) %>% # Group by study
  percent_skim() %>% # Generate percentage table
  as.data.frame() %>%
  select(skim_variable, study, factor.percents) %>%
  mutate( # Delete parentheses from study names
    study = study %>% str_remove(" \\(.*\\)")
  ) %>%
  separate_longer_delim( # Reshape output
    factor.percents,
    delim = "; "
  ) %>%
  separate_wider_delim(
    factor.percents,
    delim = ": ",
    names = c("Level", "Percent")
  ) %>%
  pivot_wider(
    names_from = "study",
    values_from = Percent
  ) %>%
  mutate( # Delete duplicate demographic types
    skim_variable = ifelse(duplicated(skim_variable), "", skim_variable)
  ) %>%
  rename( # Rename skim_variable
    Attribute = skim_variable
  )

# Table
demo_table %>%
  datasummary_df(
    title = "Sample Demographics by Study",
    note = "Note: We did not initially collect demographics in Study 2. 81% of the participants (n = 1,534) from Study 2 returned to participate in a follow-up survey between December 23 and January 6, 2022.",
    output = "tables/sample_demos.txt"
  )
